{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DRL-Based Cache Replacement Policy"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from cache.Cache import Cache\n",
    "from agents.DQNAgent import DQNAgent\n",
    "from cache.DataLoader import DataLoaderZipf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cache Model Parameters"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Auto-generated zipf simulation data\n",
    "dataloader = DataLoaderZipf(5000, 10000, 1.3, num_progs=100)\n",
    "\n",
    "# Cache size: 5, 10, 50, ...\n",
    "cache_size = 50\n",
    "\n",
    "# Various combination of features\n",
    "base_features = ('Base',)\n",
    "base_UT_features = ('Base', 'UT')\n",
    "base_UT_CT_features = ('Base', 'UT', 'CT')\n",
    "\n",
    "# Our reward function\n",
    "our_reward = dict(name='our'\n",
    "    , alpha=0.5, psi=10\n",
    "    , mu=1, beta=0.3\n",
    ")\n",
    "\n",
    "# Zhong et. al. reward function\n",
    "zhong_reward = dict(name='zhong'\n",
    "    , short_reward=1.0\n",
    "    , long_span=100\n",
    "    , beta=0.5\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Create Cache Environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "env = Cache(dataloader, cache_size\n",
    "    , feature_selection=base_features\n",
    "    , reward_params=our_reward\n",
    "    , allow_skip=False\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Setup DRL Agent"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "RL = DQNAgent(env.n_actions, env.n_features,\n",
    "    learning_rate=0.01,\n",
    "    reward_decay=0.9,\n",
    "\n",
    "    # Epsilon greedy\n",
    "    e_greedy_min=(0.0, 0.1),\n",
    "    e_greedy_max=(0.2, 0.8),\n",
    "    e_greedy_init=(0.1, 0.5),\n",
    "    e_greedy_increment=(0.005, 0.01),\n",
    "    e_greedy_decrement=(0.005, 0.001),\n",
    "\n",
    "    history_size=50,\n",
    "    dynamic_e_greedy_iter=25,\n",
    "    reward_threshold=3,\n",
    "    explore_mentor = 'LRU',\n",
    "\n",
    "    replace_target_iter=100,\n",
    "    memory_size=10000,\n",
    "    batch_size=128,\n",
    "\n",
    "    output_graph=False,\n",
    "    verbose=0\n",
    ")    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Learning Procedure"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "step = 0\n",
    "for episode in range(100):\n",
    "    # initial observation\n",
    "    observation = env.reset()\n",
    "\n",
    "    while True:\n",
    "        # agent choose action based on observation\n",
    "        action = RL.choose_action(observation)\n",
    "\n",
    "        # agent take action and get next observation and reward\n",
    "        observation_, reward = env.step(action)\n",
    "\n",
    "        # break while loop when end of this episode\n",
    "        if env.hasDone():\n",
    "            break\n",
    "\n",
    "        RL.store_transition(observation, action, reward, observation_)\n",
    "\n",
    "        if (step > 20) and (step % 5 == 0):\n",
    "            RL.learn()\n",
    "\n",
    "        # swap observation\n",
    "        observation = observation_\n",
    "\n",
    "        # report every 100 step\n",
    "        if step % 100 == 0:\n",
    "            mr = env.miss_rate()\n",
    "            print(\"Episode=%d, Step=%d: Accesses=%d, Misses=%d, MissRate=%f\"\n",
    "                % (episode, step, env.total_count, env.miss_count, mr)\n",
    "            )\n",
    "\n",
    "        step += 1"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
